The goal of this script is to generate a Seurat object for sample GSM3717034.
LogNormalize, then doublets
detection using scran hybrid and scDblFinder
method (but not filtering)LogNormalize, for only the remaining
cellsPCAtSNE and UMAPlibrary(dplyr)
library(patchwork)
library(ggplot2)
.libPaths()
## [1] "/usr/local/lib/R/library"
In this section, we set the global settings of the analysis. We will store data there :
out_dir = "."
We load the parameters :
sample_name = params$sample_name # "GSM3717034"
# sample_name = "GSM3717034"
Input count matrix is there :
count_matrix_dir = paste0(out_dir, "/input/", sample_name, "/")
count_matrix_file = list.files(count_matrix_dir, full.names = TRUE)
count_matrix_file
## [1] "./input/GSM3717034//GSM3717034_DS1-Rie_4_comb_clean.dge.txt.gz"
We load the markers and specific colors for each cell type :
cell_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_cell_markers.rds"))
cell_markers = lapply(cell_markers, FUN = toupper)
lengths(cell_markers)
## CD4 T cells CD8 T cells Langerhans cells macrophages
## 13 13 9 10
## B cells cuticle cortex medulla
## 16 15 16 10
## IRS proliferative IBL ORS
## 16 20 15 16
## IFE HFSC melanocytes sebocytes
## 17 17 10 8
Here are custom colors for each cell type :
color_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_color_markers.rds"))
data.frame(cell_type = names(color_markers),
color = unlist(color_markers)) %>%
ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
ggplot2::geom_point(pch = 21, size = 5) +
ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank())
We load markers to display on the dotplot :
dotplot_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_dotplot_markers.rds"))
dotplot_markers = lapply(dotplot_markers, FUN = toupper)
dotplot_markers
## $`CD4 T cells`
## [1] "CD3E" "CD4"
##
## $`CD8 T cells`
## [1] "CD3E" "CD8A"
##
## $`Langerhans cells`
## [1] "CD207" "CPVL"
##
## $macrophages
## [1] "TREM2" "MSR1"
##
## $`B cells`
## [1] "CD79A" "CD79B"
##
## $cuticle
## [1] "KRT32" "KRT35"
##
## $cortex
## [1] "KRT31" "PRR9"
##
## $medulla
## [1] "BAMBI" "ADLH1A3"
##
## $IRS
## [1] "KRT71" "KRT73"
##
## $proliferative
## [1] "TOP2A" "MCM5"
##
## $IBL
## [1] "KRT16" "KRT6C"
##
## $ORS
## [1] "KRT15" "GPX2"
##
## $IFE
## [1] "SPINK5" "LY6D"
##
## $HFSC
## [1] "DIO2" "TCEAL2"
##
## $melanocytes
## [1] "DCT" "MLANA"
##
## $sebocytes
## [1] "CLMP" "PPARG"
We load metadata for this sample :
sample_info = readRDS(paste0(out_dir, "/../1_metadata/takahashi_sample_info.rds"))
sample_info %>%
dplyr::filter(project_name == sample_name)
## project_name sample_type sample_identifier color
## 1 GSM3717034 Takahashi_HD Takahashi_HD_1 lawngreen
These is a parameter for different functions :
cl = aquarius::create_parallel_instance(nthreads = 3L)
cut_log_nCount_RNA = 0.5 # almost no filter
cut_nFeature_RNA = 20 # almost no filter
cut_percent.mt = 20
cut_percent.rb = 50
In this section, we load the count matrix.
mat = read.table(count_matrix_file,
header = TRUE, row.names = 1)
# For the two 10X data, this is required
rownames(mat) = stringr::str_remove(rownames(mat),
pattern = "hg19_")
# Seurat object
sobj = Seurat::CreateSeuratObject(counts = mat,
project = sample_name,
assay = "RNA")
rm(mat)
sobj
## An object of class Seurat
## 12060 features across 3000 samples within 1 assay
## Active assay: RNA (12060 features, 0 variable features)
(Time to run : 15.91 s)
We add the same columns as in metadata :
row_oi = (sample_info$project_name == sample_name)
sobj$project_name = sample_name
sobj$sample_identifier = sample_info[row_oi, "sample_identifier"]
sobj$sample_type = sample_info[row_oi, "sample_type"]
colnames(sobj@meta.data)
## [1] "orig.ident" "nCount_RNA" "nFeature_RNA"
## [4] "project_name" "sample_identifier" "sample_type"
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize",
assay = "RNA")
sobj = Seurat::FindVariableFeatures(sobj,
assay = "RNA",
nfeatures = 3000)
sobj
## An object of class Seurat
## 12060 features across 3000 samples within 1 assay
## Active assay: RNA (12060 features, 3000 variable features)
We generate a tSNE to visualize cells before filtering.
sobj = aquarius::dimensions_reduction(sobj = sobj,
assay = "RNA",
reduction = "pca",
max_dims = 100,
verbose = FALSE)
Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca")
We generate a tSNE with 20 principal components :
ndims = 20
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"),
check_duplicates = FALSE)
sobj
## An object of class Seurat
## 12060 features across 3000 samples within 1 assay
## Active assay: RNA (12060 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We annotate cells for cell type using
Seurat::AddModuleScore function.
sobj = aquarius::cell_annot_custom(sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = TRUE,
verbose = TRUE)
colnames(sobj@meta.data) = stringr::str_replace_all(string = colnames(sobj@meta.data),
pattern = " ",
replacement = "_")
sobj$cell_type = factor(sobj$cell_type, levels = names(cell_markers))
table(sobj$cell_type)
##
## CD4 T cells CD8 T cells Langerhans cells macrophages
## 52 134 347 105
## B cells cuticle cortex medulla
## 127 224 161 214
## IRS proliferative IBL ORS
## 96 93 154 135
## IFE HFSC melanocytes sebocytes
## 511 352 110 185
(Time to run : 1.03 s)
To justify cell type annotation, we can make a dotplot :
markers = c("PTPRC", "MSX2", "KRT16",
unique(unlist(dotplot_markers[levels(sobj$cell_type)])))
markers = markers[markers %in% rownames(sobj)]
aquarius::plot_dotplot(sobj, assay = "RNA",
column_name = "cell_type",
markers = markers,
nb_hline = 0) +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(legend.position = "right",
legend.box = "vertical",
legend.direction = "vertical",
axis.title = element_blank(),
axis.text = element_text(size = 15))
We can make a barplot to see the composition of each dataset, and visualize cell types on the projection.
df_proportion = as.data.frame(prop.table(table(sobj$orig.ident,
sobj$cell_type)))
colnames(df_proportion) = c("orig.ident", "cell_type", "freq")
quantif = table(sobj$orig.ident) %>%
as.data.frame.table() %>%
`colnames<-`(c("orig.ident", "nb_cells"))
# Plot
plot_list = list()
plot_list[[2]] = aquarius::plot_barplot(df = df_proportion,
x = "orig.ident",
y = "freq",
fill = "cell_type",
position = ggplot2::position_fill()) +
ggplot2::scale_fill_manual(name = "Cell type",
values = color_markers[levels(df_proportion$cell_type)],
breaks = levels(df_proportion$cell_type)) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = orig.ident, y = 1.05, label = nb_cells),
label.size = 0)
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cell_type") +
ggplot2::scale_color_manual(values = unlist(color_markers),
breaks = names(color_markers)) +
ggplot2::labs(title = sample_name,
subtitle = paste0(ncol(sobj), " cells")) +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1, widths = c(6, 1))
We annotate cells for cell cycle phase using Seurat and
cyclone.
cc_columns = aquarius::add_cell_cycle(sobj = sobj,
assay = "RNA",
species_rdx = "hs",
BPPARAM = cl)@meta.data[, c("Seurat.Phase", "Phase")]
##
## G1 G2M S
## 812 466 176
sobj$Seurat.Phase = cc_columns$Seurat.Phase
sobj$cyclone.Phase = cc_columns$Phase
table(sobj$Seurat.Phase, sobj$cyclone.Phase)
##
## G1 G2M S
## G1 500 258 108
## G2M 101 80 16
## S 201 109 50
## Undecided 10 19 2
(Time to run : 57.17 s)
We visualize cell cycle on the projection :
plot_list = list()
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "Seurat.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "cyclone.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1)
In this section, we look at the number of genes expressed by each cell, the number of UMI, the percentage of mitochondrial genes expressed, and the percentage of ribosomal genes expressed. Then, without taking into account the cells expressing low number of genes or have low number of UMI, we identify doublet cells.
We compute four quality metrics :
sobj = Seurat::PercentageFeatureSet(sobj, pattern = "^MT", col.name = "percent.mt")
sobj = Seurat::PercentageFeatureSet(sobj, pattern = "^RP[L|S][0-9]*$", col.name = "percent.rb")
sobj$log_nCount_RNA = log(sobj$nCount_RNA)
head(sobj@meta.data)
## orig.ident nCount_RNA nFeature_RNA project_name sample_identifier
## GAGCCGATGTGC GSM3717034 5722 1680 GSM3717034 Takahashi_HD_1
## AGTCCATTGCTN GSM3717034 3287 1054 GSM3717034 Takahashi_HD_1
## ATATTATTGGCA GSM3717034 1914 894 GSM3717034 Takahashi_HD_1
## GCCCTACGATCG GSM3717034 1829 846 GSM3717034 Takahashi_HD_1
## AACCAATTAGCT GSM3717034 1633 785 GSM3717034 Takahashi_HD_1
## AGCTGTGTGGCT GSM3717034 2910 1058 GSM3717034 Takahashi_HD_1
## sample_type score_CD4_T_cells score_CD8_T_cells
## GAGCCGATGTGC Takahashi_HD -0.036368128 -0.03032228
## AGTCCATTGCTN Takahashi_HD -0.018399124 -0.01047608
## ATATTATTGGCA Takahashi_HD -0.020065856 -0.02895131
## GCCCTACGATCG Takahashi_HD -0.007682242 -0.01400089
## AACCAATTAGCT Takahashi_HD -0.020199867 -0.02454284
## AGCTGTGTGGCT Takahashi_HD -0.028769986 -0.01862311
## score_Langerhans_cells score_macrophages score_B_cells
## GAGCCGATGTGC -0.02434058 -0.08960526 -0.19349544
## AGTCCATTGCTN -0.02928325 -0.04330710 -0.11435294
## ATATTATTGGCA -0.03322217 -0.04723650 -0.13033956
## GCCCTACGATCG -0.02348157 -0.04200266 -0.12012804
## AACCAATTAGCT -0.02881340 -0.03686086 -0.08707183
## AGCTGTGTGGCT -0.02306607 -0.06475252 -0.13403591
## score_cuticle score_cortex score_medulla score_IRS
## GAGCCGATGTGC -0.1689950 -0.4718040437 -0.11920058 0.11574183
## AGTCCATTGCTN -0.1253240 -0.4075978780 -0.04888838 0.14319348
## ATATTATTGGCA -0.1341447 -0.0001220687 0.90254786 0.82817744
## GCCCTACGATCG -0.1287068 -0.4274122086 -0.05804583 0.20253397
## AACCAATTAGCT -0.1057634 -0.1250109982 -0.04030250 -0.09498712
## AGCTGTGTGGCT -0.1576319 -0.4133027426 -0.05100236 0.30418639
## score_proliferative score_IBL score_ORS score_IFE
## GAGCCGATGTGC -0.09259052 0.19457031 0.073112808 1.1958267
## AGTCCATTGCTN -0.05028636 -0.07311066 -0.178796294 1.4241773
## ATATTATTGGCA -0.06190647 0.10088608 -0.165343831 -0.3758492
## GCCCTACGATCG -0.05787847 -0.05519282 -0.009003299 1.0639101
## AACCAATTAGCT 0.11990736 -0.03063805 0.030348515 0.9579954
## AGCTGTGTGGCT 0.03287891 0.12598597 -0.004139219 1.3349971
## score_HFSC score_melanocytes score_sebocytes cell_type
## GAGCCGATGTGC -0.21816220 -0.3254230 -0.2337687 IFE
## AGTCCATTGCTN -0.25926402 -0.3536076 -0.1231439 IFE
## ATATTATTGGCA -0.07092577 -0.3363119 -0.1203773 IRS
## GCCCTACGATCG -0.03607841 -0.3337974 -0.1184161 IFE
## AACCAATTAGCT 0.01357552 -0.2995709 -0.1120403 IFE
## AGCTGTGTGGCT -0.06299204 -0.3595476 -0.1575885 IFE
## Seurat.Phase cyclone.Phase percent.mt percent.rb log_nCount_RNA
## GAGCCGATGTGC G1 G1 0.8563439 18.94443 8.652074
## AGTCCATTGCTN G1 G1 0.4867660 18.52753 8.097731
## ATATTATTGGCA S G1 2.2988506 17.97283 7.556951
## GCCCTACGATCG G1 G1 1.0934937 20.55768 7.511525
## AACCAATTAGCT S G1 2.2657685 19.71831 7.398174
## AGCTGTGTGGCT G1 G1 2.5085911 16.94158 7.975908
We get the cell barcodes for the failing cells :
fail_percent.mt = sobj@meta.data %>% dplyr::filter(percent.mt > cut_percent.mt) %>% rownames()
fail_percent.rb = sobj@meta.data %>% dplyr::filter(percent.rb > cut_percent.rb) %>% rownames()
fail_log_nCount_RNA = sobj@meta.data %>% dplyr::filter(log_nCount_RNA < cut_log_nCount_RNA) %>% rownames()
fail_nFeature_RNA = sobj@meta.data %>% dplyr::filter(nFeature_RNA < cut_nFeature_RNA) %>% rownames()
Without taking into account the low UMI and low number of features cells, we identify doublets.
fsobj = subset(sobj, invert = TRUE,
cells = unique(c(fail_log_nCount_RNA, fail_nFeature_RNA)))
fsobj
## An object of class Seurat
## 12060 features across 2439 samples within 1 assay
## Active assay: RNA (12060 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
On this filtered dataset, we apply doublet cells detection. Just before, we run the normalization, taking into account only the remaining cells.
fsobj = Seurat::NormalizeData(fsobj,
normalization.method = "LogNormalize",
assay = "RNA")
fsobj = Seurat::FindVariableFeatures(fsobj,
assay = "RNA",
nfeatures = 3000)
fsobj
## An object of class Seurat
## 12060 features across 2439 samples within 1 assay
## Active assay: RNA (12060 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We identify doublet cells :
fsobj = aquarius::find_doublets(sobj = fsobj,
BPPARAM = cl)
## [1] 12060 2439
##
## FALSE TRUE
## 1956 483
## [12:02:06] WARNING: amalgamation/../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
##
## FALSE TRUE
## 2367 72
##
## FALSE TRUE
## 1932 507
fail_doublets_consensus = Seurat::WhichCells(fsobj, expression = doublets_consensus.class)
fail_doublets_scDblFinder = Seurat::WhichCells(fsobj, expression = scDblFinder.class)
fail_doublets_hybrid = Seurat::WhichCells(fsobj, expression = hybrid_score.class)
(Time to run : 33.81 s)
We add the information in the non filtered Seurat object :
sobj$doublets_consensus.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_consensus ~ TRUE,
!(colnames(sobj) %in% fail_doublets_consensus) ~ FALSE)
sobj$scDblFinder.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_scDblFinder ~ TRUE,
!(colnames(sobj) %in% fail_doublets_scDblFinder) ~ FALSE)
sobj$hybrid_score.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_hybrid ~ TRUE,
!(colnames(sobj) %in% fail_doublets_hybrid) ~ FALSE)
We can visualize the 4 cells quality with a Venn diagram :
n_filtered = c(fail_percent.mt, fail_percent.rb, fail_log_nCount_RNA, fail_nFeature_RNA) %>%
unique() %>% length()
percent_filtered = round(100*(n_filtered/ncol(sobj)), 2)
ggvenn::ggvenn(list(percent.mt = fail_percent.mt,
percent.rb = fail_percent.rb,
log_nCount_RNA = fail_log_nCount_RNA,
nFeature_RNA = fail_nFeature_RNA),
fill_color = c("#0073C2FF", "#EFC000FF", "orange", "pink"),
stroke_size = 0.5, set_name_size = 4) +
ggplot2::labs(title = "Filtered out cells",
subtitle = paste0(n_filtered, " cells (", percent_filtered, " % of all cells)")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
To visualize the threshold for number of UMI, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "log_nCount_RNA",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_log_nCount_RNA)
Seurat::VlnPlot(sobj, features = "log_nCount_RNA", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_log_nCount_RNA, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_log_nCount_RNA,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "log_nCount_RNA",
subtitle = paste0(length(fail_log_nCount_RNA), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To visualize the threshold for number of features, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "nFeature_RNA",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_nFeature_RNA)
Seurat::VlnPlot(sobj, features = "nFeature_RNA", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_nFeature_RNA, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_nFeature_RNA,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "nFeature_RNA",
subtitle = paste0(length(fail_nFeature_RNA), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To identify a threshold for mitochondrial gene expression, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "percent.mt",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_percent.mt)
Seurat::VlnPlot(sobj, features = "percent.mt", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_percent.mt, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_percent.mt,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "percent.mt",
subtitle = paste0(length(fail_percent.mt), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To identify a threshold for ribosomal gene expression, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "percent.rb",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_percent.rb)
Seurat::VlnPlot(sobj, features = "percent.rb", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_percent.rb, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_percent.rb,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "percent.rb",
subtitle = paste0(length(fail_percent.rb), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
We would like to see if the number of feature expressed by cell, and
the number of UMI is correlated with the cell type, the percentage of
mitochondrial and ribosomal gene expressed, and the doublet status. We
build the log_nCount_RNA by nFeature_RNA
figure, where cells (dots) are colored by these different metrics.
This is the figure, colored by cell type :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "cell_type",
col_colors = unname(color_markers),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the percentage of mitochondrial genes expressed in cell :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "percent.mt",
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the percentage of ribosomal genes expressed in cell :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "percent.rb",
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(doublets_consensus.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "doublets_consensus.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(scDblFinder.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "scDblFinder.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(hybrid_score.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "hybrid_score.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
Do filtered cells belong to a particular cell type ?
sobj$all_cells = TRUE
plot_list = list()
## All cells
df = sobj@meta.data
if (nrow(df) == 0) {
plot_list[[1]] = ggplot()
} else {
plot_list[[1]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = "All cells",
subtitle = paste(nrow(df), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## Doublets consensus
df = sobj@meta.data %>%
dplyr::filter(doublets_consensus.class)
if (nrow(df) == 0) {
plot_list[[2]] = ggplot()
} else {
plot_list[[2]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = "doublets_consensus.class",
subtitle = paste(sum(sobj$doublets_consensus.class, na.rm = TRUE), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## percent.mt
df = sobj@meta.data %>%
dplyr::filter(percent.mt > cut_percent.mt)
if (nrow(df) == 0) {
plot_list[[3]] = ggplot()
} else {
plot_list[[3]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("percent.mt >", cut_percent.mt),
subtitle = paste(length(fail_percent.mt), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## percent.rb
df = sobj@meta.data %>%
dplyr::filter(percent.rb > cut_percent.rb)
if (nrow(df) == 0) {
plot_list[[4]] = ggplot()
} else {
plot_list[[4]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("percent.rb >", cut_percent.rb),
subtitle = paste(length(fail_percent.rb), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## log_nCount_RNA
df = sobj@meta.data %>%
dplyr::filter(log_nCount_RNA < cut_log_nCount_RNA)
if (nrow(df) == 0) {
plot_list[[5]] = ggplot()
} else {
plot_list[[5]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("log_nCount_RNA <", round(cut_log_nCount_RNA, 2)),
subtitle = paste(length(fail_log_nCount_RNA), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## nFeature_RNA
df = sobj@meta.data %>%
dplyr::filter(nFeature_RNA < cut_nFeature_RNA)
if (nrow(df) == 0) {
plot_list[[6]] = ggplot()
} else {
plot_list[[6]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("nFeature_RNA <", round(cut_nFeature_RNA, 2)),
subtitle = paste(length(fail_nFeature_RNA), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
patchwork::wrap_plots(plot_list, ncol = 3) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
We can compare doublet detection methods with a Venn diagram :
ggvenn::ggvenn(list(hybrid = fail_doublets_hybrid,
scDblFinder = fail_doublets_scDblFinder),
fill_color = c("#0073C2FF", "#EFC000FF"),
stroke_size = 0.5, set_name_size = 4) +
ggplot2::ggtitle(label = "Doublet cells") +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"))
We visualize cells annotation for doublets :
plot_list = list()
# scDblFinder.class
sobj$fail = ifelse(sobj$scDblFinder.class,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "fail",
na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "scDblFinder.class",
subtitle = paste0(sum(sobj$scDblFinder.class, na.rm = TRUE), " cells")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# hybrid_score.class
sobj$fail = ifelse(sobj$hybrid_score.class,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "fail",
na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "hybrid_score.class",
subtitle = paste0(sum(sobj$hybrid_score.class, na.rm = TRUE), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
sobj$fail = NULL
# Plot
patchwork::wrap_plots(plot_list, nrow = 1)
What is the composition of doublet cells ? We just look at score for each cell type.
sobj$orig.ident.doublets = case_when(is.na(sobj$doublets_consensus.class) ~ "bad quality",
sobj$doublets_consensus.class == TRUE ~ paste0(sobj$orig.ident, " doublets"),
sobj$doublets_consensus.class == FALSE ~ "not doublet")
sobj$orig.ident.doublets = factor(sobj$orig.ident.doublets,
levels = c(paste0(as.character(sample_info$sample_identifiant), " doublets"),
"bad quality", "not doublet"))
doublets_compo = function(score1, score2) {
type1 = unlist(lapply(stringr::str_split(score1, pattern = "score_"), `[[`, 2))
type2 = unlist(lapply(stringr::str_split(score2, pattern = "score_"), `[[`, 2))
if (type1 == type2) {
the_title = "Homotypic doublet"
the_subtitle = type1
score1 = "log_nCount_RNA"
} else {
the_title = "Heterotypic doublet"
the_subtitle = paste(type1, type2, sep = " + ")
}
p = sobj@meta.data %>%
dplyr::arrange(desc(orig.ident.doublets)) %>%
ggplot2::ggplot(., aes(x = eval(parse(text = score1)),
y = eval(parse(text = score2)),
col = orig.ident.doublets)) +
ggplot2::geom_point(size = 0.25) +
ggplot2::scale_color_manual(values = c(sample_info$color, "gray90", "gray60"),
breaks = c(paste0(as.character(sample_info$sample_identifiant), " doublets"),
"bad quality", "not doublet")) +
ggplot2::labs(x = score1, y = score2,
title = the_title, subtitle = the_subtitle) +
ggplot2::theme_classic() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
return(p)
}
score_columns = grep(x = colnames(sobj@meta.data),
pattern = "^score",
value = TRUE)
combinations = expand.grid(score_columns, score_columns) %>%
apply(., 1, sort) %>% t() %>%
as.data.frame()
combinations = combinations[!duplicated(combinations), ]
plot_list = apply(combinations, 1, FUN = function(elem) {
doublets_compo(elem[1], elem[2])
})
sobj$orig.ident.doublets = NULL
patchwork::wrap_plots(plot_list, ncol = 4) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
We could save this object before filtering (remove
eval = FALSE) :
saveRDS(sobj, paste0(out_dir, "/datasets/", sample_name, "_sobj_unfiltered.rds"))
We remove :
Note: We do not filter cells detected as doublets. Indeed, few genes and transcripts are detected per cell, and the best cells are therefore annotated as doublets.
sobj = subset(sobj, invert = TRUE,
cells = unique(c(fail_log_nCount_RNA, fail_nFeature_RNA,
fail_percent.mt, fail_percent.rb)))
sobj
## An object of class Seurat
## 12060 features across 2421 samples within 1 assay
## Active assay: RNA (12060 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We normalize the count matrix for remaining cells :
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize",
assay = "RNA")
sobj = Seurat::FindVariableFeatures(sobj,
assay = "RNA",
nfeatures = 3000)
sobj
## An object of class Seurat
## 12060 features across 2421 samples within 1 assay
## Active assay: RNA (12060 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We perform a PCA :
sobj = aquarius::dimensions_reduction(sobj = sobj,
assay = "RNA",
reduction = "pca",
max_dims = 100,
verbose = FALSE)
Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca")
We generate a tSNE and a UMAP with 20 principal components :
ndims = 20
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"),
check_duplicates = FALSE)
sobj = Seurat::RunUMAP(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_umap"))
We annotate cells for cell type, with the new normalized expression matrix :
score_columns = grep(x = colnames(sobj@meta.data), pattern = "^score", value = TRUE)
sobj@meta.data[, score_columns] = NULL
sobj$cell_type = NULL
sobj = aquarius::cell_annot_custom(sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = TRUE,
verbose = TRUE)
sobj$cell_type = factor(sobj$cell_type, levels = names(cell_markers))
colnames(sobj@meta.data) = stringr::str_replace_all(string = colnames(sobj@meta.data),
pattern = " ",
replacement = "_")
table(sobj$cell_type)
##
## CD4 T cells CD8 T cells Langerhans cells macrophages
## 36 260 97 156
## B cells cuticle cortex medulla
## 112 139 139 120
## IRS proliferative IBL ORS
## 44 41 141 102
## IFE HFSC melanocytes sebocytes
## 456 314 93 171
(Time to run : 0.98 s)
To justify cell type annotation, we can make a dotplot :
markers = c("PTPRC", unique(unlist(dotplot_markers[levels(sobj$cell_type)])))
markers = markers[markers %in% rownames(sobj)]
aquarius::plot_dotplot(sobj, assay = "RNA",
column_name = "cell_type",
markers = markers,
nb_hline = 0) +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(legend.position = "right",
legend.box = "vertical",
legend.direction = "vertical",
axis.title = element_blank(),
axis.text = element_text(size = 15))
We can make a barplot to see the composition of each dataset, and visualize cell types on the projection.
df_proportion = as.data.frame(prop.table(table(sobj$orig.ident,
sobj$cell_type)))
colnames(df_proportion) = c("orig.ident", "cell_type", "freq")
quantif = table(sobj$orig.ident) %>%
as.data.frame.table() %>%
`colnames<-`(c("orig.ident", "nb_cells"))
# Plot
plot_list = list()
plot_list[[2]] = aquarius::plot_barplot(df = df_proportion,
x = "orig.ident",
y = "freq",
fill = "cell_type",
position = ggplot2::position_fill()) +
ggplot2::scale_fill_manual(name = "Cell type",
values = color_markers[levels(df_proportion$cell_type)],
breaks = levels(df_proportion$cell_type)) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = orig.ident, y = 1.05, label = nb_cells),
label.size = 0)
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = "RNA_pca_20_tsne") +
ggplot2::scale_color_manual(values = unlist(color_markers),
breaks = names(color_markers)) +
ggplot2::labs(title = sample_name,
subtitle = paste0(ncol(sobj), " cells")) +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1, widths = c(6, 1))
We annotate cells for cell cycle phase :
cc_columns = aquarius::add_cell_cycle(sobj = sobj,
assay = "RNA",
species_rdx = "hs",
BPPARAM = cl)@meta.data[, c("Seurat.Phase", "Phase")]
##
## G1 G2M S
## 784 425 164
sobj$Seurat.Phase = cc_columns$Seurat.Phase
sobj$cyclone.Phase = cc_columns$Phase
table(sobj$Seurat.Phase, sobj$cyclone.Phase)
##
## G1 G2M S
## G1 478 234 105
## G2M 107 77 22
## S 185 99 37
## Undecided 14 15 0
(Time to run : 49.21 s)
We visualize cell cycle on the projection :
plot_list = list()
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = "RNA_pca_20_tsne") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "Seurat.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = "RNA_pca_20_tsne") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "cyclone.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1)
We make a highly resolutive clustering :
sobj = Seurat::FindNeighbors(sobj, reduction = "RNA_pca", dims = c(1:ndims))
sobj = Seurat::FindClusters(sobj, resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2421
## Number of edges: 103171
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.5082
## Number of communities: 14
## Elapsed time: 0 seconds
table(sobj$seurat_clusters)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 328 295 272 268 248 176 153 146 143 136 114 60 42 40
We can visualize the cell type :
tsne = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = paste0("RNA_pca_", ndims, "_tsne"), cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = paste0("RNA_pca_", ndims, "_umap"), cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We can visualize the cell cycle, from Seurat :
tsne = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We can visualize the cell cycle, from cyclone :
tsne = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We visualize the clustering :
tsne = Seurat::DimPlot(sobj, group.by = "seurat_clusters", label = TRUE,
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "seurat_clusters", label = TRUE,
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We visualize all cell types markers on the tSNE :
markers = dotplot_markers %>% unlist() %>% unname()
markers = markers[markers %in% rownames(sobj)]
plot_list = lapply(markers,
FUN = function(one_gene) {
p = Seurat::FeaturePlot(sobj, features = one_gene,
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
ggplot2::labs(title = one_gene) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
ggplot2::theme(aspect.ratio = 1,
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes()
return(p)
})
patchwork::wrap_plots(plot_list, ncol = 4)
We save the annotated and filtered Seurat object :
saveRDS(sobj, file = paste0(out_dir, "/datasets/", sample_name, "_sobj_filtered.rds"))
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.3.5 patchwork_1.1.2 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] softImpute_1.4 graphlayouts_0.7.0
## [3] pbapply_1.4-2 lattice_0.20-41
## [5] haven_2.3.1 vctrs_0.3.8
## [7] usethis_2.0.1 dynwrap_1.2.1
## [9] blob_1.2.1 survival_3.2-13
## [11] prodlim_2019.11.13 dynutils_1.0.5
## [13] later_1.3.0 DBI_1.1.1
## [15] R.utils_2.11.0 SingleCellExperiment_1.8.0
## [17] rappdirs_0.3.3 uwot_0.1.8
## [19] dqrng_0.2.1 jpeg_0.1-8.1
## [21] zlibbioc_1.32.0 pspline_1.0-18
## [23] pcaMethods_1.78.0 mvtnorm_1.1-1
## [25] htmlwidgets_1.5.4 GlobalOptions_0.1.2
## [27] future_1.22.1 UpSetR_1.4.0
## [29] laeken_0.5.2 leiden_0.3.3
## [31] clustree_0.4.3 parallel_3.6.3
## [33] scater_1.14.6 irlba_2.3.3
## [35] DEoptimR_1.0-9 tidygraph_1.1.2
## [37] Rcpp_1.0.9 readr_2.0.2
## [39] KernSmooth_2.23-17 carrier_0.1.0
## [41] promises_1.1.0 gdata_2.18.0
## [43] DelayedArray_0.12.3 limma_3.42.2
## [45] graph_1.64.0 RcppParallel_5.1.4
## [47] Hmisc_4.4-0 fs_1.5.2
## [49] RSpectra_0.16-0 fastmatch_1.1-0
## [51] ranger_0.12.1 digest_0.6.25
## [53] png_0.1-7 sctransform_0.2.1
## [55] cowplot_1.0.0 DOSE_3.12.0
## [57] ggvenn_0.1.9 here_1.0.1
## [59] TInGa_0.0.0.9000 ggraph_2.0.3
## [61] pkgconfig_2.0.3 GO.db_3.10.0
## [63] DelayedMatrixStats_1.8.0 gower_0.2.1
## [65] ggbeeswarm_0.6.0 iterators_1.0.12
## [67] DropletUtils_1.6.1 reticulate_1.26
## [69] clusterProfiler_3.14.3 SummarizedExperiment_1.16.1
## [71] circlize_0.4.15 beeswarm_0.4.0
## [73] GetoptLong_1.0.5 xfun_0.35
## [75] bslib_0.3.1 zoo_1.8-10
## [77] tidyselect_1.1.0 reshape2_1.4.4
## [79] purrr_0.3.4 ica_1.0-2
## [81] pcaPP_1.9-73 viridisLite_0.3.0
## [83] rtracklayer_1.46.0 rlang_1.0.2
## [85] hexbin_1.28.1 jquerylib_0.1.4
## [87] dyneval_0.9.9 glue_1.4.2
## [89] RColorBrewer_1.1-2 matrixStats_0.56.0
## [91] stringr_1.4.0 lava_1.6.7
## [93] europepmc_0.3 DESeq2_1.26.0
## [95] recipes_0.1.17 labeling_0.3
## [97] httpuv_1.5.2 class_7.3-17
## [99] BiocNeighbors_1.4.2 DO.db_2.9
## [101] annotate_1.64.0 jsonlite_1.7.2
## [103] XVector_0.26.0 bit_4.0.4
## [105] mime_0.9 aquarius_0.1.5
## [107] Rsamtools_2.2.3 gridExtra_2.3
## [109] gplots_3.0.3 stringi_1.4.6
## [111] processx_3.5.2 gsl_2.1-6
## [113] bitops_1.0-6 cli_3.0.1
## [115] batchelor_1.2.4 RSQLite_2.2.0
## [117] randomForest_4.6-14 tidyr_1.1.4
## [119] data.table_1.14.2 rstudioapi_0.13
## [121] org.Mm.eg.db_3.10.0 GenomicAlignments_1.22.1
## [123] nlme_3.1-147 qvalue_2.18.0
## [125] scran_1.14.6 locfit_1.5-9.4
## [127] scDblFinder_1.1.8 listenv_0.8.0
## [129] ggthemes_4.2.4 gridGraphics_0.5-0
## [131] R.oo_1.24.0 dbplyr_1.4.4
## [133] BiocGenerics_0.32.0 TTR_0.24.2
## [135] readxl_1.3.1 lifecycle_1.0.1
## [137] timeDate_3043.102 ggpattern_0.3.1
## [139] munsell_0.5.0 cellranger_1.1.0
## [141] R.methodsS3_1.8.1 proxyC_0.1.5
## [143] visNetwork_2.0.9 caTools_1.18.0
## [145] codetools_0.2-16 Biobase_2.46.0
## [147] GenomeInfoDb_1.22.1 vipor_0.4.5
## [149] lmtest_0.9-38 msigdbr_7.5.1
## [151] htmlTable_1.13.3 triebeard_0.3.0
## [153] lsei_1.2-0 xtable_1.8-4
## [155] ROCR_1.0-7 BiocManager_1.30.10
## [157] scatterplot3d_0.3-41 abind_1.4-5
## [159] farver_2.0.3 parallelly_1.28.1
## [161] RANN_2.6.1 askpass_1.1
## [163] GenomicRanges_1.38.0 RcppAnnoy_0.0.16
## [165] tibble_3.1.5 ggdendro_0.1-20
## [167] cluster_2.1.0 future.apply_1.5.0
## [169] Seurat_3.1.5 dendextend_1.15.1
## [171] Matrix_1.3-2 ellipsis_0.3.2
## [173] prettyunits_1.1.1 lubridate_1.7.9
## [175] ggridges_0.5.2 igraph_1.2.5
## [177] RcppEigen_0.3.3.7.0 fgsea_1.12.0
## [179] remotes_2.4.2 scBFA_1.0.0
## [181] destiny_3.0.1 VIM_6.1.1
## [183] testthat_3.1.0 htmltools_0.5.2
## [185] BiocFileCache_1.10.2 yaml_2.2.1
## [187] utf8_1.1.4 plotly_4.9.2.1
## [189] XML_3.99-0.3 ModelMetrics_1.2.2.2
## [191] e1071_1.7-3 foreign_0.8-76
## [193] withr_2.5.0 fitdistrplus_1.0-14
## [195] BiocParallel_1.20.1 xgboost_1.4.1.1
## [197] bit64_4.0.5 foreach_1.5.0
## [199] robustbase_0.93-9 Biostrings_2.54.0
## [201] GOSemSim_2.13.1 rsvd_1.0.3
## [203] memoise_2.0.0 evaluate_0.18
## [205] forcats_0.5.0 rio_0.5.16
## [207] geneplotter_1.64.0 tzdb_0.1.2
## [209] caret_6.0-86 ps_1.6.0
## [211] DiagrammeR_1.0.6.1 curl_4.3
## [213] fdrtool_1.2.15 fansi_0.4.1
## [215] highr_0.8 urltools_1.7.3
## [217] xts_0.12.1 GSEABase_1.48.0
## [219] acepack_1.4.1 edgeR_3.28.1
## [221] checkmate_2.0.0 scds_1.2.0
## [223] cachem_1.0.6 npsurv_0.4-0
## [225] babelgene_22.3 rjson_0.2.20
## [227] openxlsx_4.1.5 ggrepel_0.9.1
## [229] clue_0.3-60 rprojroot_2.0.2
## [231] stabledist_0.7-1 tools_3.6.3
## [233] sass_0.4.0 nichenetr_1.1.1
## [235] magrittr_2.0.1 RCurl_1.98-1.2
## [237] proxy_0.4-24 car_3.0-11
## [239] ape_5.3 ggplotify_0.0.5
## [241] xml2_1.3.2 httr_1.4.2
## [243] assertthat_0.2.1 rmarkdown_2.18
## [245] boot_1.3-25 globals_0.14.0
## [247] R6_2.4.1 Rhdf5lib_1.8.0
## [249] nnet_7.3-14 RcppHNSW_0.2.0
## [251] progress_1.2.2 genefilter_1.68.0
## [253] statmod_1.4.34 gtools_3.8.2
## [255] shape_1.4.6 HDF5Array_1.14.4
## [257] BiocSingular_1.2.2 rhdf5_2.30.1
## [259] splines_3.6.3 AUCell_1.8.0
## [261] carData_3.0-4 colorspace_1.4-1
## [263] generics_0.1.0 stats4_3.6.3
## [265] base64enc_0.1-3 dynfeature_1.0.0
## [267] smoother_1.1 gridtext_0.1.1
## [269] pillar_1.6.3 tweenr_1.0.1
## [271] sp_1.4-1 ggplot.multistats_1.0.0
## [273] rvcheck_0.1.8 GenomeInfoDbData_1.2.2
## [275] plyr_1.8.6 gtable_0.3.0
## [277] zip_2.2.0 knitr_1.41
## [279] ComplexHeatmap_2.14.0 latticeExtra_0.6-29
## [281] biomaRt_2.42.1 IRanges_2.20.2
## [283] fastmap_1.1.0 ADGofTest_0.3
## [285] copula_1.0-0 doParallel_1.0.15
## [287] AnnotationDbi_1.48.0 vcd_1.4-8
## [289] babelwhale_1.0.1 openssl_1.4.1
## [291] scales_1.1.1 backports_1.2.1
## [293] S4Vectors_0.24.4 ipred_0.9-12
## [295] enrichplot_1.6.1 hms_1.1.1
## [297] ggforce_0.3.1 Rtsne_0.15
## [299] shiny_1.7.1 numDeriv_2016.8-1.1
## [301] polyclip_1.10-0 grid_3.6.3
## [303] lazyeval_0.2.2 Formula_1.2-3
## [305] tsne_0.1-3 crayon_1.3.4
## [307] MASS_7.3-54 pROC_1.16.2
## [309] viridis_0.5.1 dynparam_1.0.0
## [311] rpart_4.1-15 zinbwave_1.8.0
## [313] compiler_3.6.3 ggtext_0.1.0